#-*- codeing = utf-8 -*-
#@Time : 2020/8/1 19:41
#@Author : 阳某
#@File : xiachufang.py
#@Software : PyCharm
import os
from urllib.parse import urlparse
import requests
import re
img_dir = os.path.join(os.curdir,'imgs')
# if not os.path.isdir(img_dir):
#     os.mkdir(img_dir)
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3775.400 QQBrowser/10.6.4208.400'
}
url ='http://www.xiachufang.com/'
r = requests.get(url,headers = headers)
print(r.status_code,r.reason)
html = r.text
# print(html)
img1 = re.findall('<img src="(.*?)"',html,re.S)
# print(img1)
img2 = re.findall('data-src="(.*?)" alt="',html,re.S)
# print(img2)
# print(len(img2))
# 倒叙删除法
for i in img1[::-1]:
    # print(i)
    # print(len(i))
    if len(i) ==136:
        img1.remove(i)
    if len(i) ==0:
        img1.remove(i)
img_list = img1+img2
del img_list[0]
print(len(img_list))
for img in img_list:
    # print(i)
    o = urlparse(img)
    # print(o)
    filename = o.path[1:].split('@')[0]
    filepath = os.path.join(img_dir,filename)
    # print(filepath)
    if not os.path.isdir(os.path.dirname(filepath)):
        os.mkdir(os.path.dirname(filepath))
    img_url = '%s://%s/%s'%(o.scheme,o.netloc,filename)
    # print(img_url)
    res = requests.get(img_url)
    with open(filepath,'wb') as f:
        for chunk in res.iter_content(1024):
            f.write(chunk)